script
% This 4th version is to process the full version of data with a bayesian-PCA.
%
% It also attempts to compare it with scores found by chris_pca3.m (those from a partial data
% and classic PCA). It uses variable COMPSOLD to check for the presense of "standard PCA" results
% in the workspace.

% Nov 01 2012: Revision
% Sep 25 2014: Adjusting for new data.
% Oct 04 2014: + qqplots; separate processing of subsets of data.
%               Sections removed; flag-switches introduced.
%               Lots of old analyses and visualizations removed as they all worked by overwriting data, and it's not helpful.
% Oct 07 2014: + age analysis.
% Oct 08 2014: Cosmetic changes in usability and readability.
% Oct 21 2014: The whold flag_ageVsDisco section
% Oct 30 2014: Correlations.
% Nov 28 2014: Projections of lightboxed tads into basic PCA fixed (signs).
% Nov 30 2014: Projections of lightboxed cells into basic PCA: now accommodating for missing data records.
% Jan 01 2015: Improvements in: Comparisons between stage and experience; stage analysis.
% Jan 03 2015: Circos revamped. + Varimax rotation.
% Jan 05 2015: Combining proj and scores (ligthboxed and naive) in one variable (fullScores).
% Jan 06 2015: Age vs lightbox plot.
%               Also suddenly realized that spiking threshold I used in measured in MV, not in PA. It's not the current,
%               but the Y-coordinate of a kink point on a first spike trace.
% Jan 07 2015: Lightbox analysis.
% Jan 08 2015: Individual correlations. New plots for stages-associated changes.
% Jan 09 2015: Comparing lightboxing effect in terms of full cluster size (new flag: flag_discoBlobs).
% Jan 14 2015: Cell location analysis.
% Apr 20 2015: Blob contours for disco blobs (within flag_Scores section)
% Jul 10 2015: Updated for new revision of data. Rearranged variables (variable labels) according to the
%               new sequence. Also a slightly different layout of the input file (extra header row).
% Jul 27 2015: Some rearrangement of blob (kde)-producing procedures, a functional copy of _pca4 program and a new flag for kernels. 
%               New functionality: variable exclusion from the PCA set based on [varExplained] variable that is now read
%               from the data file as well.
% Jul 28 2015: Messing up with automated promax rotation, standardizing components orientation.
% Aug 04 2015: Variation explained per group (local PCAs). Spearman correlations for the corr-plot.


%%% ----------------------- Control switches and constants:

flag_ReadData                   = 0;        % Whether Excel file needs to be re-read

flag_preProcessing              = 1;        % Pre-process the data (normalize, sort, rank etc.). Always keep it on.
flag_useFullData                = 0;        % Use both naive and discoboxed data (as opposed to only naive data)
flag_useOnly49s                 = 0;        % Use only adult data. Both this and previous ones can cause erors if contradicting analysis (stage / disco) is turned on.
flag_rankTransform              = 0;        % Whether aggressive normalization should be applied
flag_preProcessing_fig          = 0;        % Rank-transformation related figures (histograms)
flag_remove_uninformative       = 0;        % Remove uninformative values (based on varExplained). Not actually a flag: set to a number of variables to remove.

flag_runPca                     = 0;        % Re-run PCA (takes some time, as it's Bayesian iterative)
flag_promax                     = 1;        % If post-PCA promax rotation is to be implemented

flag_2D_results                 = 0;        % Loading plot
flag_1D_results                 = 0;        % 1-D PCA visualizations and other optional stuff
flag_Scores                     = 0;        % Score-plot
flag_scores_kernels             = 0;        % Whether on top of score-plots you want to see corresponding blobs. Affects 2 sections of the code.
flag_stagePics                  = 0;        % Stage-dependencies (either with or without blobs)
flag_discoPics                  = 1;        % Discoboxing comparison (for s49 only)
flag_discoDiversity             = 0;        % Comparing variability of raw data (33D) for naive vs lighboxed
flag_ageVsDisco                 = 0;        % Compare variables that change with age to that changing with disco
flag_pattern_by_group           = 0;        % Variation explained, per group of cells
flag_correlations               = 0;        % Correlations, including cisco plot
flag_individualCorrs            = 0;        % Plots for individual correlations
flag_individual_correlations    = 0;        % Individual correlation plots (only significant)
flag_cell_locations             = 0;        % Look for cell locations
flag_cell_time                  = 0;        % Prep age in hours and circadian stuff


if(~exist('data','var'))
    flag_ReadData = 1; flag_preProcessing = 1; flag_runPca = 1;     % Useful shortcut for the first run
end
% flag_preProcessing = 0; flag_runPca = 0;    % Manual shortcut for later runs...


stagecolors = cbrewer('div','RdYlBu',7);
discocolors = [stagecolors(end-1,:) ; [32 32 32]/256 ];
% stagecolors = pmkmp(7,'CubicYF');   stagecolors = stagecolors(end:-1:1,:);


%%% ----------------------- Read new data
fprintf('\n');
if(flag_ReadData)
    dispf('Reading data from Excel');
    clear fileName sheetNames xlsNum xlsStr names temp id stage boxed daytime prepAge location data nVars nCells;

    fileName = 'C:\Users\Arseny\Documents\4_Cells Classification\2015 data\Chris Input Table 150728.xlsx';    
    sheetName = 'data';
    [xlsNum,xlsStr] = xlsread(fileName, sheetName);

    names  = xlsStr(1,8:end)';      % A cell array of column labels
    temp    = xlsNum(4:end,:);      % Main data

    id      = temp(:,1);
    stage   = temp(:,2);
    boxed   = temp(:,3);            % Whether they were discoboxed
    daytime = temp(:,4);
    prepAge = temp(:,5);            % Age of the prep by the time the cell was recorded
    location = temp(:,[6 7]);       % X and Y in the tectum
    data    = temp(:,[8:end]);      % Main set of data

    nVars   = length(names);
    nCells  = length(id);
    dispf('Data read successfully');
    
    [xlsNum,xlsStr] = xlsread(fileName, 'var_explained');   % Total variance explained by each of the variables
    varExplained = xlsNum;
    
else
    dispf('Using old data (environment)');
end

namesFlat = {'Cm'    'Rm'    'Ra'    'I hold'    'Na thresh'    ...
    'I Na'    'KS thresh'    'I Ks' 'KT thresh'    'I Kt'    ...
    'Decay current'    'Spike thresh'    'Spike amplitude'    'Spike risetime'   'Spike width'    ...
    'I best'    'N spikes, steps'    'Spike ISI, ms'    'Spike ISI accommodation'    'Spike accommodation' ...
     'N spikes, wave'    'Spiking resonance'    'Resonance width'    'Wave buildup'    'Wave decay'    'Jitter'    ...
     'Synaptic resonance'    'Synaptic resonance width'    'Synaptic charge'    'Synaptic PPR'    'Monosynapticity' ...
     'Minis frequency' 'Minis amplitude'};


%%% ----------------------- Is all data to be used???

%%% You can choose any of these subsets for your data
if(flag_useFullData)
    subset = ~isnan(id);	dispf('Full data');
else
    subset = (boxed~=1);	dispf('Non-Discoboxed tads only');
end
if(flag_useOnly49s)
    subset = subset & ((stage==49)|((stage==48)));  dispf('Only older tadpoles (stages 48-49)');
end

%%% ----------------------- Should we use all variables
if(flag_remove_uninformative>0)
    [~,order] = sort(varExplained);
    ranks(order) = 1:length(order);
    goodOnes = ranks>flag_remove_uninformative; % Only ones with high varExplained remain
    
    data = data(:,goodOnes);
    names = names(goodOnes);
    namesFlat = namesFlat(goodOnes);
    nVars = nVars - flag_remove_uninformative;
    fprintf('Removing %d variables.\n',flag_remove_uninformative);
end


%%% ----------------------- Pre-process data & distributions

if(flag_preProcessing)
    dispf('Brushing data before PCA');
    clear n m iv dMeans dStd dataN rank dataR;
    
    [n,m] = size(data);
    for(iv=1:m)
        dMeans(iv) = mean(data(~isnan(data(:,iv)),iv));    
    end
    dataN = bsxfun(@plus,data,-dMeans);                         % Unbias data
    for(iv=1:m)    
        dStd(iv) =    std(data(~isnan(data(:,iv)),iv));
    end
    dataN = bsxfun(@times,dataN,1./dStd);                       % And normalize variances

    for(iv=1:m)                             % Rank-normalization
        g = ~isnan(data(:,iv));
        rank = tiedrank( data(g,iv) );
        p = rank / ( length(rank) + 1 );    % +1 to avoid Inf for the max point
        dataR(:,iv) = data(:,iv);           % Passing the NaNs
        dataR(g,iv) = norminv( p, 0, 1 );   % Overwriting normal values with ranked values
    end
    
    if(flag_preProcessing_fig)              % Only if figures are requested
        figure;                                 % Histograms of raw data (not even unbiased)
        for(q=1:nVars);
            g = ~isnan(data(:,q));
            n = sum(g);
            subplot(4,ceil(33/4),q); hist(data(g,q),floor(sqrt(n)));
            set(gca,'FontSize',5);
            title(names{q});
        end

        figure;                                 % Histograms of rank-transformed data
        for(q=1:nVars);
            g = ~isnan(dataR(:,q));
            n = sum(g);
            subplot(4,ceil(33/4),q); hist(dataR(g,q),floor(sqrt(n)));
            set(gca,'FontSize',5);
            title(names{q});
        end

        figure;                                 % qqplots
        for(q=1:nVars);
            g = ~isnan(dataR(:,q));
            n = sum(g);
            subplot(4,ceil(33/4),q); 
            plot(dataR(g,q),dataN(g,q),'.');
            set(gca,'FontSize',5);
            title(names{q});
        end
    end
else
    dispf('Data was NOT pre-processed before PCA');
end


%%% ---------------------------------------------- PCA -----------------------

if(flag_runPca)    
    if(sum(subset)<length(subset))
        dispf('Note: running PCA on a subset of data only');
    end
    clear nComps A S Mu V cv hp lc comps scores turn1 turn2;
    dispf('PCA: Started...');
    nComps = 2;
    if(flag_rankTransform)
        [ A, S, Mu, V, cv, hp, lc ] = pca_full(dataR(subset,:)',nComps,'verbose',0);  
    else
        [ A, S, Mu, V, cv, hp, lc ] = pca_full(dataN(subset,:)',nComps,'verbose',0);  
    end
    %   The trick here is that it can be either dataN for normal data, or dataR for rank-transformed data
    % Here A = components (composition of variables; contribution of each component to individual variables)
    %   X(:,j) = Mu + A*S(:,j) + Noise
    %   S = prominence of these components in each measurement (aka scores)
    %   Mu = mean values
    %   V = Noise variance
    %   The rest of outputs are technical and I don't quite understand them (see help).
    dispf('PCA: iterations finished. Making sense of the results.');
    %fprintf('  Variance explained: %f\n',1-V/(V + sum(var(S'))));  % Looks like this one is not correct
    fprintf('  Variance explained: %f\n',1-V);

    comps = A;          % Loads. Variables run down; component N - to the right
    scores = S';        % Cell scores. Cells run down; component N - to the right

    % Because the components may end up with either sign, here I control for that.
    indexForCm = 1;
    indexForNSpikes = 17;   % Default value
    [n,m] = size(dataN(subset,:));
    for(iVar=1:m)
        if(strcmp('N spikes, steps',namesFlat{iVar}))
            indexForNSpikes = iVar;
            fprintf('Basic spiking variable found at position %d.\n',iVar);
        end
    end
    
    % temp1 = comps*scores';    % Debugger (see the debugging figure below)
    
    if(flag_promax)
        [comps,T] = rotatefactors(comps(:,1:2),'Method','promax','Coeff',100);
    else
        T = eye(2); % Unity matrix
    end
    %%% B = rotatefactors(A,'Method','promax') rotates A to maximize the promax criterion, 
    %%% equivalent to an oblique Procrustes rotation with a target created by an orthomax rotation. 
    %%% Use the four orthomax parameters to control the orthomax rotation used internally by promax.    
    comps = [-comps(:,2) comps(:,1)];       % Correct most frequent post-promax configuration (by swapping them)
    scores = (inv(T)*scores')';             % Update the scores with promax rotation matrix
    scores = [-scores(:,2) scores(:,1)];    % And correct (swap) them. It USUALLY give a good result.
    
    if(abs(comps(indexForNSpikes,1)) < abs(comps(indexForNSpikes,2)))   % Check if coordinates are flipped (swapped)
        comps = comps(:,[2 1]);                             % If so - try to swap them back
        scores = scores(:,[2 1]);
        fprintf('PCA coordinates had to be swapped\n');
    end
    
    turn1 = sign(comps(indexForNSpikes,1));             % 17 for mean N spikes: 1st component of Nspikes should be positive
    turn2 = sign(comps(indexForCm ,2));                 % 1 for Cm:             2nd component of Cm should be positive
    % turn3 = sign(comps(3,1));         	% 3 for Ra
    comps(:,1) = comps(:,1)*turn1;          % N Spikes should always on the right side (to make C1 look like "Spikiness").
    comps(:,2) = comps(:,2)*turn2;          % Cm should always be at the bottom (to make C2 look like "cell size").
    % comps(:,3) = comps(:,3)*turn3;        % Ra should be facing up.

    scores(:,1) = scores(:,1)*turn1;        % Reflect scores accordingly.
    scores(:,2) = scores(:,2)*turn2;
    % scores(:,3) = scores(:,3)*turn3;

    clear turn1 turn2 turn3;
    
    % temp2 = comps*scores';
    % figure; plot(temp1(:),temp2(:),'.'); title('Check if rotations were successful');
    % clear temp1 temp2;
end

%%% ----------------------- 2D plot of loadings (variables in the PCA axes)

if(flag_2D_results)
    figure('Color','white'); % 2D-Presence of components in variables
    enum(comps(:,1),comps(:,2),namesFlat);
    
    %%% -- Some human-readable markup:
    hold on;
    if(0)   % Pretty colors for some variables. Would not work if variables were excluded
        try 
            q = [17 22 26];     plot(comps(q,1),comps(q,2),'o','MarkerEdgeColor','none','MarkerFaceColor','red');
            q = [6 8 10];     plot(comps(q,1),comps(q,2),'o','MarkerEdgeColor','none','MarkerFaceColor',[3 2 0]/3); 
            q = [1 2 3];     plot(comps(q,1),comps(q,2),'o','MarkerEdgeColor','none','MarkerFaceColor','black');
        end
    end
    hold off;
    xlabel('Component 1'); ylabel('Component 2');
    mygrid(gca);
    
%     figure('Color','white'); % Silly blobby circles without text, but blending - not useful at all
%     hold on;
%     plot(comps(:,1),comps(:,2),'ko','MarkerSize',20);
%     plot(comps(:,1),comps(:,2),'ko','MarkerSize',19,'MarkerFaceColor','g','MarkerEdgeColor','none');
%     hold off;
%     xlabel('Component 1'); ylabel('Component 2');

    % figure;
    % enum(comps(:,1),comps(:,3),names);
    % xlabel('Component 1'); ylabel('Component 3');

    % if(1)
    %     plotmatrixgui(data2,scores(:,1:5));
    %     plotmatrixgui(scores(:,1:5));
    % end
end


%%% ----------------------- Scores

%%% First some processing that should happen either way.
if(sum(subset)<length(subset))  % If PCA was run on a subset of data
    %%% Copy from below that explains the formula: proxy = (Mu(idat) + A(idat,:)*S)*dStd(idat) + dMeans(idat);
    goodData = dataN(~subset,:);                                % We may have to drop some columns if not too many cells are having them       
    if(1)                                                       % This section compensates for the incompleteness of data. If you want a STRICT projection, set flag to 0.
        %figure; myplot(goodData); title('Unprocessed good data');
        for(iCol = 1:m)
            defaultValue = mean(goodData(~isnan(goodData(:,iCol)),iCol));
            goodData(isnan(goodData(:,iCol)),iCol) = defaultValue;
        end
        %figure; myplot(goodData); title('Processed good data');
    end
    proj = linsolve(comps,bsxfun(@plus,goodData,-Mu')')';       % Solving a linear equation for component proxies
                                                                % Among other things, shifting disco data in the same way as naive data was shifted (Mu)

    for(iCell=1:length(id)) % Create combined set of scores and proj
        if(subset(iCell))   % Basic cell
            fullScores(iCell,:) = scores(find(id(iCell)==id(subset)),1:2);
        else
            fullScores(iCell,:) = proj(find(id(iCell)==id(~subset)),1:2);
        end
    end
else % PCA was run on full data
    if(~exist('proj','var'))    % First run after recalculation
        fprintf('Full set: internal variable split\n');
        fullScores = scores;
        scores = fullScores(boxed~=1,:);    % To make it compatible with the rest of the code
        proj = fullScores(boxed==1,:);
    end
end

if(flag_Scores)
    clear ha1 ha2 ha3 myXLim1 myYLim1 myXLim2 myYLim2 g1 g2 goodData goodColumns defaultValue iCol;
    
    %%% --- Clusters:
    %nClusters = 3;
    %cid = kmeans(scores,nClusters,'dist','sqeuclidean');
    %figure;
    %scatter(scores(:,1),scores(:,2),50,cid,'Filled');
    %xlabel('C1'); ylabel('C2');
    
    figure('Color','white'); %%% ---- Scores for different stages
    ha1 = axes();
    if(sum(subset)<length(subset))  % PCA was run on a subset of data
        scatter(scores(:,1),scores(:,2),50,stage(subset),'Filled','MarkerEdgeColor',[1 1 1]*0.5);
    else
        hold on;
        scatter(scores(:,1),scores(:,2),50,stage(boxed~=1),'Filled','MarkerEdgeColor',[1 1 1]*0.5);
        % scatter(proj(:,1),proj(:,2),50,[1 1 1]*0.4,'Filled','MarkerEdgeColor',[1 1 1]*0.5);
    end
    % set(ha1,'DataAspectRatio',[1 1 1]);
    caxis([43-0.5 49.5]);     colormap(stagecolors);
    colorbar(); title('Control population');
    myXLim1 = get(gca,'XLim');
    myYLim1 = get(gca,'YLim');
        
    figure('Color','white'); %%% ---- Scores for in- and out-of-subset cells (usually stimulated and naive)
    if(length(subset)==sum(subset)) % If full set
        localsubset = (boxed~=1);   % Plot something meanintgul
    else
        localsubset = subset;       % Else - plot whatever the larger program is doing
    end
    ha2 = axes();
    scatter(proj(:,1),proj(:,2),50,stage(~localsubset),'Filled','MarkerEdgeColor',[1 1 1]*0.5);
    % set(ha2,'DataAspectRatio',[1 1 1]);
    caxis([43-0.5 49.5]);     colormap(stagecolors);
    colorbar(); title('Treatment population');
    myXLim2 = get(gca,'XLim');
    myYLim2 = get(gca,'YLim');

    set(ha1,'XLim',[min(myXLim1(1),myXLim2(1)) max(myXLim1(2),myXLim2(2))],'YLim',[min(myYLim1(1),myYLim2(1)) max(myYLim1(2),myYLim2(2))]);
    set(ha2,'XLim',[min(myXLim1(1),myXLim2(1)) max(myXLim1(2),myXLim2(2))],'YLim',[min(myYLim1(1),myYLim2(1)) max(myYLim1(2),myYLim2(2))]);
    drawnow;        
    mygrid(ha2);

    figure('Color','white');        % Discoboxed cells shown on top of naive cells
    ha3 = axes;
    hold on;
    g1 = ismember(stage(localsubset),[48 49]);
    scatter(scores(g1,1),scores(g1,2),50,ones(sum(g1),1)*1,'Filled','MarkerEdgeColor','none');
    g2 = ismember(stage(~localsubset),[48 49]);
    scatter(proj(g2,1),proj(g2,2),50,ones(sum(g2),1)*2,'Filled','MarkerEdgeColor','none');
    colormap(discocolors); legend;
    colorbar();
    hold off;        
    set(ha3,'XLim',[min(myXLim1(1),myXLim2(1)) max(myXLim1(2),myXLim2(2))],'YLim',[min(myYLim1(1),myYLim2(1)) max(myYLim1(2),myYLim2(2))]);
    mygrid(ha3);

    if(flag_scores_kernels)
        figure;
        xLimBig = get(ha3,'XLim');
        yLimBig = get(ha3,'YLim');
        [bandwidth,density,xz,yz] = kde2d(scores(g1,:),20,[xLimBig(1) yLimBig(1)],[xLimBig(2) yLimBig(2)]);        
        [~,hCounters] = contourf(xz,yz,density,5);
        title('Original cluster');

        figure;
        xLimBig = get(ha3,'XLim');
        yLimBig = get(ha3,'YLim');
        [bandwidth,density,xz,yz] = kde2d(proj(g2,:),20,[xLimBig(1) yLimBig(1)],[xLimBig(2) yLimBig(2)]);
        [~,hCounters] = contourf(xz,yz,density,5);
        title('Lightboxed cluster');
    end

    %%% --- Quantify blob sizes for naive and lightboxed cells
    if(0)
        clear sets iset bag counter c1 c2 x y n;        
        for(iset=1:2)
            if(iset==1)                
                x = scores(g1,1);
                y = scores(g1,2);
            else                
                x = proj(g2,1);
                y = proj(g2,2);
            end
            n = length(x);
            bag{iset} = zeros(n*(n-1)/2,1);   % A bag for all pairs
            counter = 0;
            for(c1=2:n)
                for(c2=1:(c1-1))
                    counter = counter+1;
                    bag{iset}(counter) = sqrt((x(c1)-x(c2))^2 + (y(c1)-y(c2))^2);
                end
            end
        end       
        figure; compare_columns(concatenan(bag{1:2}),{'Naive','Stimulated'},'pub',[],50);   % --- In case you want to compare cluster sizes
        fprintf('Size of naive cluster: %f\n',mean(bag{1}));
        fprintf('Size of lightboxed cluster: %f\n',mean(bag{2}));
        title('Change of blob size with stage');
    end
    %mygrid(ha1);
    
    %figure; enum(scores(:,1),scores(:,2),id); xlabel('C1'); ylabel('C2');   % Enumeration
    %typicalCells = [18001 25004 27005 10001];   % right, bottom, left, top        
end


%%% ----------------------- Advanced PCA visualizations
if(flag_1D_results)
    %%% ----------------------- Reconstruction test

    % idat = 31;   % Which value to reconstruct. SpikeN, spike-shape and ionic currents are reconstructed pretty well
    % proxy = (Mu(idat) + A(idat,:)*S)*dStd(idat) + dMeans(idat);
    % figure; plot(data(:,idat),proxy,'.'); set(gca,'DataAspectRatio',[1 1 1]); xlabel('Data'); ylabel('Reconstruction'); title(names{idat});
    

    %%% ----------------------- PCA print-outs

    % fprintf('\n');
    % for(q=1:n) % for each cell
    %    fprintf('%7d \t%f \t%f \t%f\n',id(q),scores(q,1), scores(q,2), scores(q,3));
    % end

    %%% ----------------------- 1D Plots of loads

    % figure; % Impact of variables into components
    % %subplot(1,4,1); enum(ones(1,m)*0.5,1:m,names); ylim([1 m]);
    % for(q=1:nComps)
    %     subplot(1,nComps,q);
    %     %subplot(1,5,(q-1)*2+1); % Spread a bit more
    %     hold on;
    %     plot(comps(:,q),1:m,'o-b');
    %     %plot(A(:,q),1:m,'.-r'); % Bayesian PCA
    %     ylim([1 m]);
    %     grid on;
    %     title(sprintf('Component %d',q));
    %     set(gca,'YTick',[]);
    %     for(ic = 1:m)
    %         if(abs(comps(ic,q))>0.2)
    %             if(comps(ic,q)>0)
    %                 text(comps(ic,q)*1.1,ic,names{ic},'FontSize',8,'HorizontalAlignment','left');
    %             else
    %                 text(comps(ic,q)*1.1,ic,names{ic},'FontSize',8,'HorizontalAlignment','right','Color','r');
    %             end
    %         else
    %             text(0,ic,names{ic},'FontSize',8,'HorizontalAlignment','center','Color',[1 1 1]*0.8);
    %         end
    %     end
    %     % if(q==3)
    %     %     legend('PCA','Bayes');
    %     % end    
    %     hold off;
    % end
end


%%% ----------------------- Stage dependencies
if(flag_stagePics)
    clear n m q gNo gUp gDn temp tmean terr g signhill peak47 pt pc p udflag a43 a45 a47 a49 s43 s45 s47 s49;
    dispf('-- Analyzing developmental stages --');
    
    [n,m] = size(data(subset,:));
    gNo = [];   % Group of variables that didn't change
    gUp = [];   % Group of variables that went up
    gDn = [];   % Group of variables that went down
    stageRange([43 44 45 46 47 48 49]) = [43 43 45 45 47 49 49];    % for ANOVA
    fprintf('Below UD stands for up/down, and HP stands for hill/pit.\n');
    fprintf('Name                        \tga\tud\t47\n');
    for(q=1:m)      % For every variable
        locName = strrep(names{q},char(10),' ');
        p = anova1(data(subset,q)',stage(subset)','off');       
        g = subset & ~isnan(data(:,q));             % Good values that would work with correlation
        [rho,pc] = corr(data(g,q),stage(g));
        signAge(q) = sign(rho);                     % Whether it grows or decreases overall (to be used later)
        [~,pt] = ttest2(data(g & ismember(stage,[45 46 47]),q),data(g & ismember(stage,[43 44 48 49]),q));
        signhill = sign(mean(data(g & ismember(stage,[45 46 47]),q))-mean(data(g & ismember(stage,[43 44 48 49]),q)));
        [~,peak47] = ttest2(data(g & ismember(stage,[47]),q),data(g & ismember(stage,[46 48]),q));      % Whether peak at s47 is significant
        sign4647 = sign(mean(data(g & ismember(stage,[47]),q))-mean(data(g & ismember(stage,[46]),q))); % Change from s46 to 47
        sign4748 = sign(mean(data(g & ismember(stage,[48]),q))-mean(data(g & ismember(stage,[47]),q))); % Similar change from 47 to 48
        [~,pvar] = vartest2(data(g & ismember(stage,[45 46]),q),data(g & ismember(stage,[48 49]),q));   % Var test
        panova = anova1(data(g,q),stageRange(stage(g)),'off');
        a43 = mean(data(g & ismember(stage,[42 43 44]),q));
        a45 = mean(data(g & ismember(stage,[45 46]),q));
        a47 = mean(data(g & ismember(stage,[47]),q));
        a49 = mean(data(g & ismember(stage,[48 49]),q));
        s43 = std(data(g & ismember(stage,[42 43 44]),q));
        s45 = std(data(g & ismember(stage,[45 46]),q));
        s47 = std(data(g & ismember(stage,[47]),q));
        s49 = std(data(g & ismember(stage,[48 49]),q));
        if(pc>0.05)
            gNo = [gNo; q];
        else            
            if(rho>0)
                gUp = [gUp; q];
            else
                gDn = [gDn; q];
            end
        end
        if(1)    % Print all. Alternatively, only pay ANOVA-significant ones (see ELSE section below)
            if((pc<0.05) && (rho>0))
                udflag = 'U';
            elseif((pc<0.05) && (rho<0))
                udflag = 'D';
            else
                udflag = '';
            end
            %fprintf('%30s\t%3s\t%3s\t%3d\t%3d\t%3d\n',locName, myst(panova,1), udflag, (pt<0.05)*(signhill>0), (pt<0.05)*(signhill<0), (peak47<0.05)*(sign4647*sign4748<0));
            %fprintf('%30s\t%3s\t%3s\t%3s',locName, myst(panova,1), udflag, myst(peak47,1));
            fprintf('%30s$%3s$%3s$%3s$',locName, myst(panova,1), myst(pc,1), myst(peak47,1));
            fprintf('%5.1f$%5.1f$',a43,s43);
            fprintf('%5.1f$%5.1f$',a45,s45);
            fprintf('%5.1f$%5.1f$',a47,s47);
            fprintf('%5.1f$%5.1f$',a49,s49);
            if(pvar<0.05)
                fprintf('%3s',myst(pvar,1));
                if(s49>s45); fprintf('$UP'); else; fprintf('$down'); end;
            end
            fprintf('\n');
        else
            if(panova<0.05) % Print only those that change
                fprintf('%30s\t%4.2f\t%s\t%d\n',locName,rho,myst(pc),sum(g));
            end
        end
        %fprintf('%30s: anova: %s\t r: %s\n',locName,myst(p),myst(pc));
    end
    
    %ageVarList = 1:m;  % All variables
    ageVarList = [1 5 9 10 12 14 15 16 17 22 24 30];
    onlyplus = [1;1;1;0;0;1;0;1;0;1;1;0;1;1;1;0;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1]; % Which variables are strictly positive
    if(0)   % Plot Changes with age and save it as a PDF
        stageGroups = {[42 43 44],[45 46],[47],[48 49]};                % Simplified staging
        stageNames = {'43-44','45-46','47','48-49'};
        stagex = [43.5 45.5 47 48.5]-43;
        stageRange([43 44 45 46 47 48 49]) = [43 43 45 45 47 49 49];    % for ANOVA
        for(q=1:length(ageVarList))
            iVar = ageVarList(q);
            if(mod(q,12)==1)
                figure('Color','white');
            end
            g = (boxed==0) & ~isnan(data(:,iVar));
            for(iStage = 1:length(stageGroups))                
                tmean(iStage) = mean(data(g & ismember(stage,stageGroups{iStage}),iVar));
                terr(iStage)  =  std(data(g & ismember(stage,stageGroups{iStage}),iVar));
            end
            subplot(4,3,mod(q-1,12)+1); hold on;
            ribbonplot(stagex,tmean,terr);
            %plot(43:49,tmean,'.-');
            %errorbar(43:49,tmean,terr);
            hold off;
            set(gca,'FontSize',6,'XLim',[0 6],'XTick',stagex,'XTickLabel',stageNames);
            locName = namesFlat{iVar};
            if(onlyplus(iVar)>0)
                set(gca,'YLim',[0 +Inf]);
            end
            
            pval = anova1(data(g,iVar),stageRange(stage(g)),'off');
            
            ylabel(locName,'FontSize',8);
            % title(sprintf('p=%s',myst(pval)));
        end
        drawnow();
        fprintf('Saving the figure as a pdf... ');
        %set(hF,'PaperPosition',[0 0 8 6]);
        print(gcf,'-dpdf','-r600','C:\Users\Arseny\Documents\4_Cells Classification\Figures AI\fig-Age-draft.pdf')
        fprintf('Done!\n');
    end
    
    if(0) % Figures of best
        figure('Color','white');
        bestn = ceil(sqrt(length(gUp)));
        for(iLine=1:length(gUp))
            for(iStage = 1:(49-43+1))
                g = subset & (stage==(43-1+iStage)) & ~isnan(data(:,gUp(iLine)));
                tmean(iStage) = mean(data(g,gUp(iLine)));
                terr(iStage)  =  std(data(g,gUp(iLine)));
            end
            subplot(bestn,bestn,iLine); hold on;
            plot(43:49,tmean,'.-');
            errorbar(43:49,tmean,terr);
            hold off;
            set(gca,'FontSize',8);
            locName = strrep(names{gUp(iLine)},char(10),'');
            title(locName);
        end

        figure('Color','white');
        bestn = ceil(sqrt(length(gDn)));
        for(iLine=1:length(gDn))
            for(iStage = 1:(49-43+1))
                g = subset & (stage==(43-1+iStage)) & ~isnan(data(:,gDn(iLine)));
                tmean(iStage) = mean(data(g,gDn(iLine)));
                terr(iStage)  =  std(data(g,gDn(iLine)));
            end
            subplot(bestn,bestn,iLine); hold on;
            plot(43:49,tmean,'.-');
            errorbar(43:49,tmean,terr);
            hold off;
            set(gca,'FontSize',8);
            locName = strrep(names{gDn(iLine)},char(10),'');
            title(locName);
        end
    end
    
    if(flag_scores_kernels) %%% --- Blobs movement over stages        
        pairwise_distances = [];
        stageGroups = {[42 43 44],[45 46],[47],[48 49]};
        stageGroupName = {'42-44','45-46','47','48-49'};
        stage_grouping = nan(sum(boxed~=1),1);
        figure;                
        plot(scores(:,1),scores(:,2),'.');
        xLimBig = get(gca,'XLim'); yLimBig = get(gca,'YLim');
        for(iStage=1:4)
            subplot(2,2,iStage);            
            g = ismember(stage(boxed ~= 1),stageGroups{iStage});
            stage_grouping(g) = iStage;     % Preparing for anova downstream
            [bandwidth,density,xz,yz] = kde2d(scores(g,:),20,[xLimBig(1) yLimBig(1)],[xLimBig(2) yLimBig(2)]);        
            [~,hCounters] = contourf(xz,yz,density,5);
            patches = findobj(gca,'type','patch');
            for(q=1:length(patches))
                set(patches(q),'FaceColor',[1 0.5 0] + [0 0.5 1]*q/length(patches),'EdgeColor',0.9*[1 1 1]*q/length(patches));
            end
            hold on;             
            plot(xLimBig,[0 0],':','Color',[1 1 1]*0.4);
            plot([0 0],yLimBig,':','Color',[1 1 1]*0.4);
            plot(scores(g,1),scores(g,2),'k.');
            hold off;
            set(gca,'XLim',xLimBig,'YLim',yLimBig,'FontSize',8);
            title(stageGroupName{iStage},'FontSize',8);
            
            gi = find(g);
            for(cell1=2:length(gi))
                for(cell2=1:cell1-1)
                    pairwise_distances = [pairwise_distances; 
                        iStage sqrt((scores(gi(cell1),1)-scores(gi(cell2),1))^2 + (scores(gi(cell1),2)-scores(gi(cell2),2))^2)];
                end
            end
        end
        [p,anovatab] = anova1(pairwise_distances(:,2),pairwise_distances(:,1),'off');
        fval = anovatab{2,5};
        fprintf('F-val for blob size: %5.1f\n',fval);
        [p,anovatab] = anova1(scores(:,1),stage_grouping,'off');
        fval = anovatab{2,5};
        fprintf('F-val for blob position along C1: %5.1f\n',fval);
    end
    
end



%%% ----------------------- Discoboxing comparisons
if(flag_discoPics)
    clear g1 g2 n m q p bag;
    dispf('-- Analyzing discoboxing --');
    if(1)
        dispf('Both s48 and s49');
        g1 = (boxed~=1) & ((stage==49) | (stage==48));
        g2 = (boxed==1) & ((stage==49) | (stage==48));
    else
        dispf('Stage s49 only (stage s48 excluded)');
        g1 = (boxed~=1) & (stage==49);
        g2 = (boxed==1) & (stage==49);
    end
    fprintf('N naive tadpoles: %d; N discoboxed tadpoles: %d\n',sum(g1),sum(g2));
    [n,m] = size(data(subset,:));
    bag = [];                           % Data columns for which it was significant
    for(q=1:m)                          % For each variable
        gn = ~isnan(data(:,q));
        signDisco(q) = sign(mean(data(gn & g2,q))-mean(data(gn & g1,q)));  % Disco minus control: sign of the change
        p = ranksum(data(gn & g1,q),data(gn & g2,q));
        [~,pvar] = vartest2(data(gn & g1,q),data(gn & g2,q));        
        if(p<0.05)
            fprintf('%30s \t%6s \t n =  \t%d \t%d  \t: median change from/to: \t %f \t%f\n',namesFlat{q},myst(p),...
                sum(gn & g1),sum(gn & g2),median(data(gn & g1,q)),median(data(gn & g2,q)));
            bag = [bag; q];
        end
        if(pvar<0.05)
            fprintf('%30s \t%6s \t n =  \t%d \t%d  \t: variance change from/to: \t %f \t%f\n',namesFlat{q},myst(pvar),...
                sum(gn & g1),sum(gn & g2),var(data(gn & g1,q)),var(data(gn & g2,q)));
            %bag = [bag; q];
        end
    end
    dispf('FDR was not applied to the comparisons above.');
    
    fprintf('\n%30s \t%6s\t%6s\t%6s\t%6s\t%6s\t%6s\n','','Naive','','','Light','','');
    fprintf('%30s \t%6s\t%6s\t%6s\t%6s\t%6s\t%6s\t%6s\n','Variable','Mean','SD','N','Mean','SD','N','PMW');
    for(q=1:m)  % Now just print out all mean values with sd
        gn = ~isnan(data(:,q));
        p = ranksum(data(gn & g1,q),data(gn & g2,q));
        fprintf('%30s \t%f\t%f\t%d\t%f\t%f\t%d\t%s\n',namesFlat{q},mean(data(gn & g1,q)),std(data(gn & g1,q)),sum(gn & g1),...
            mean(data(gn & g2,q)),std(data(gn & g2,q)),sum(gn & g2),myst(p));
    end
    
    figure('Color','white');
    nBest = ceil(sqrt(length(bag)));
    for(q=1:length(bag))
        
        subplot(nBest,nBest,q);    % For normall analysis or debugging
        %figure('Color','white');    % For exporting to Illustrator
        
        % hold on;
        % plot(1+randn(size(data(g1,q)))*0.02,data(g1,q),'.b');
        % plot(2+randn(size(data(g2,q)))*0.02,data(g2,q),'.r');
        % hold off;        
        % set(gca,'FontSize',8,'XTick',[1 2],'XTickLabel',{'naive','disco'});
        % xlim([0 3]);
        values_and_averages3(concatenan(data(g1,bag(q)),data(g2,bag(q))),{'naive','disco'},20);        
        title(namesFlat{bag(q)});
    end
    
    %%% Now compare pairwise distances in 2D
    pairwise_distances1 = [];
    pairwise_distances2 = [];
    gi = find(g1);
    for(cell1=2:length(gi))
        for(cell2=1:cell1-1)
            pairwise_distances1 = [pairwise_distances1; 
                sqrt((fullScores(gi(cell1),1)-fullScores(gi(cell2),1))^2 + (fullScores(gi(cell1),2)-fullScores(gi(cell2),2))^2)];
        end
    end
    gi = find(g2);
    for(cell1=2:length(gi))
        for(cell2=1:cell1-1)
            pairwise_distances2 = [pairwise_distances2; 
                sqrt((fullScores(gi(cell1),1)-fullScores(gi(cell2),1))^2 + (fullScores(gi(cell1),2)-fullScores(gi(cell2),2))^2)];
        end
    end
    fprintf('\n\n -------- 2D pairwise distances\n');
    figure; compare_columns(concatenan(pairwise_distances1(:),pairwise_distances2(:)),[],[],[],10); title('2D Pairwise');
    fprintf('\n -------- \n\n\n');
end


%%% ----------------------- Comparing cell diversity for naive vs. lightboxed (not only C1C2, but full)
if(flag_discoDiversity)
    clear set iSet iExp c1 c2 bag data2 possible;
    set{1} = find((boxed==0) & ismember(stage,[48 49]));        % Naive old
    set{2} = find((boxed==1) & ismember(stage,[48 49]));        % Lightboxed    
    for(iSet=1:2)                                               % First for naive, then for lightboxed
        bag{iSet} = [];        
        for(iExp=1:20)                                          % Several independent experiments for each group
            fprintf('Cross-distances experiment %d for set %d\n',iExp,iSet);
            data2 = dataN;                                      % Make copy of the data (and used normalized data at that)
            for(c1=2:length(set{iSet}))
                for(c2=1:(c1-1))                                % For each pair of cells
                    for(iVar=1:length(namesFlat))               % For each variable                        
                        if(isnan(data2(set{iSet}(c2),iVar)))    % Missing variable
                            possible = data(set{iSet},iVar);
                            possible = possible(~isnan(possible));  % All possible variables
                            data2(set{iSet}(c2),iVar) = possible(1+floor(rand(1)*length(possible)));  % Pick a random variable from the list
                        end
                        if(isnan(data2(set{iSet}(c1),iVar)))        % Same exactly for c1
                            possible = data(set{iSet},iVar);
                            possible = possible(~isnan(possible));
                            data2(set{iSet}(c1),iVar) = possible(1+floor(rand(1)*length(possible)));
                        end                        
                    end
                end
            end
            temp = pdist(data2(set{iSet},:),'cosine');        % possible distances: euclidean, cityblock, spearman, cosine, correlation
            bag{iSet} = [bag{iSet}; temp(:)];                   % Put new distances in the bag
        end
    end
    figure; compare_columns(concatenan(bag{1},bag{2}),{'Naive','Lightboxed'},'pub',[],50);
    fprintf('Means:\t%f\t%f\n',mean(bag{1}),mean(bag{2}));
    fprintf('SDs:\t%f\t%f\n',std(bag{1}),std(bag{2}));
    fprintf('Size:\t%f\t%f\n',length(bag{1}),length(bag{2}));
end


%%% ----------------------- Location analysis

if(flag_cell_locations)
    figure;
    plot(location(:,1),location(:,2),'o'); xlabel('X'); ylabel('Y'); title('Locations of all cells')
    
    rcdist = sqrt((location(:,1)-160).^2 + (160-location(:,2)).^2);
    % According to my estimations (GABA paper) the median side of the tectum is about 100 um from the midline,
    % and the top is about 200 um from the lip (rostrally). The "Center of old cells" to measure from is a bit
    % more towards the center of the OT though, with estimated location of [160 160].
    % (Based on the results of the GABA_POSITION_FINDER2 function)

    %localSet = true(size(rcdist)); % All cells
    localSet = stage>47;          % Only older cells

    clear 1 p g counter;
    
    fprintf('\n');
    counter = 0;
    for(q=1:size(data,2))    
        g = ~isnan(rcdist) & ~isnan(data(:,q)) & localSet;        
        if(sum(g*1)<=1)            
            p(q) = NaN;
        else
            [r,p(q)] = corr(rcdist(g),data(g,q));                                
        end        
    end
    pp = fdr(p);
    for(q=1:size(data,2))
        if(pp(q)==1)
            if(mod(counter,9)==0)
                figure('Color','white');
            end
            fprintf('%2d \t%20s - (%d)\t%4.2f\t%s\n',q,namesFlat{q},sum(g),r,myst(p(q)));                
            counter = counter+1;
            subplot(3,3,mod(counter-1,9)+1,'FontSize',8);
            % myUsefulPlot(rcdist(g),data(g,q),subset(g),'R-C distance',names{q},0);                
            hold on;
            plot(rcdist(g),data(g,q),'.');
            xlabel('R-C distance');
            ylabel(namesFlat{q});
            title(sprintf('r=%4.2f, p=%s',r,myst(p(q))));
            hold off;
        end
    end
    
    fprintf('Comparison of naive and stimulated cells in terms of RC distance:\n');
    figure; compare_columns(concatenan(rcdist(localSet & (boxed==0)),rcdist(localSet & (boxed==1))),{'Naive','Stimulated'})
end

%%% ----------------------- Prep age in hours and circadian stuff
if(flag_cell_time)
    %localSet = true(size(rcdist)); % All cells
    subslocalSetet = stage>47;          % Only older cells
    
    clear x p g counter xx poly i q;
    
    for(i=1:2)
        currentX = {'Prep tiredness, h','Day time, h'};
        switch i
            case 1                
                fprintf('\nTiredness of the prep analysis\n');
                x = prepAge;
            case 2
                fprintf('\nCircadian analysis\n');
                x = daytime*24+prepAge;
        end
        counter = 0;    
        for(q=1:size(data,2))    
            g = ~isnan(x) & ~isnan(data(:,q)) & localSet;        
            if(sum(g*1)<=1)            
                p(q) = NaN;
            else
                [r,p(q)] = corr(x(g),data(g,q));                                    
            end
        end
        pp = fdr(p);
        for(q=1:size(data,2))
            if(pp(q)==1)
                if(mod(counter,6)==0)
                    figure('Color','white');
                end
                fprintf('%2d \t%20s - (%d)\t%4.2f\t%s\n',q,namesFlat{q},sum(g),r,myst(p(q)));                
                counter = counter+1;
                subplot(2,3,mod(counter-1,6)+1,'FontSize',8);
                % myUsefulPlot(rcdist(g),data(g,q),subset(g),'R-C distance',names{q},0);                
                hold on;
                plot(x(g),data(g,q),'g.');
                xlabel(currentX{i});
                ylabel(namesFlat{q});
                poly = polyfit(x(g),data(g,q),1);
                xx = [min(x(g)) max(x(g))];
                plot(xx,polyval(poly,xx),'k-');
                title(sprintf('r=%4.2f, p=%s',r,myst(p(q))));
                hold off;
            end
        end
    end
    
    clear stageGroup stageName i bagTired bagDaytime;
    stageGroup = {[42 43 44],[45 46],[47],[48 49]};                % Simplified staging
    stageName = {'43-44','45-46','47','48-49'};    
    for(i=1:length(stageGroup))
        bagTired{i} = prepAge(ismember(stage,stageGroup{i}));
        bagDaytime{i} = daytime(ismember(stage,stageGroup{i}))*24 + prepAge(ismember(stage,stageGroup{i}));
    end
    fprintf('------- Comparing prep ages across stages:\n');
    figure; compare_columns(bagTired,stageName); ylabel('Prep age, h');
    fprintf('------- Comparing days of time across stages:\n');
    figure; compare_columns(bagDaytime,stageName); ylabel('Time of recording, h');
    fprintf('------- Comparing prep ages across experimental sets:\n');
    figure; compare_columns(concatenan(prepAge(localSet & (boxed==0)),prepAge(localSet & (boxed==1))),...
        {'Naive','Stimulated'}); ylabel('Prep age, h');
    fprintf('------- Comparing days of time across experimental sets:\n');
    figure; compare_columns(concatenan(daytime(localSet & (boxed==0)),daytime(localSet & (boxed==1)))*24 + ...
        concatenan(prepAge(localSet & (boxed==0)),prepAge(localSet & (boxed==1))),...
        {'Naive','Stimulated'}); ylabel('Time of recording, h');
end


%%% ----------------------- FDR for correlations (everything vs everything)

%%% Test bench for correlations between individual pairs of data:
if(0)
    i1 = 6; i2 = 17;
    figure('Color','white'); 
    hold on;
    plot(data(subset & (boxed==0),i1),data(subset & (boxed==0),i2),'k.'); 
    plot(data(subset & (boxed==1),i1),data(subset & (boxed==1),i2),'r.'); 
    legend({'Control','Disco'});
    hold off;
    xlabel(namesFlat(i1)); ylabel(namesFlat(i2)); 
    g = ~isnan(data(:,i1)) & ~isnan(data(:,i2)) & subset;
    [temp_rho_pearson,temp_p_pearson] = corr(data(g,i1),data(g,i2));
    [temp_rho_spearman,temp_p_spearman] = corr(data(g,i1),data(g,i2),'Type','Spearman');
    title(sprintf('Pearson Rho: %4.2f ; p=%s . Spearman: rho: %4.2f ; p=%s ',...
        temp_rho_pearson,myst(temp_p_pearson),temp_rho_spearman,myst(temp_p_spearman)));
end

if(flag_correlations)
    clear ip corrvars matPval matRho matFdr i1 i2 ip g p rho;
    % First - calculate all correlations
    ip = 0;
    corrvars = [];          % To make the things easy - here I'll store variables that were compared at each step
    matPval = nan(nVars);   % Matrix of correlation p-values
    matRho  = nan(nVars);   % Matrix of correlation rho values rho-values
    matFdr  = nan(nVars);
    matPvalS = nan(nVars); matRhoS = nan(nVars); matFdrS = nan(nVars); % Spearman equivalents

    for(i1=2:nVars)
        for(i2=1:i1-1)
            ip = ip+1;
            g = ~isnan(data(:,i1)) & ~isnan(data(:,i2)) & subset;   % Good datapoints within the subset of interest
            ncount(ip) = sum(g*1);
            corrvars = [corrvars; i1 i2];                           % Now row #ip will always store my is
            if(ncount(ip)<=2)                                       % If Not possible to calculate correlation
                fprintf('\n');
                p(ip) = NaN;    rho(ip) = NaN;
                ps(ip) = NaN;   rhos(ip) = NaN;
            else            
                [rho(ip),p(ip)] = corr(data(g,i1),data(g,i2));  % Pearson
                [rhos(ip),ps(ip)] = corr(data(g,i1),data(g,i2),'type','Spearman');
            end
            matPval(i2,i1) = p(ip);         matRho(i2,i1) = rho(ip);
            matPvalS(i2,i1) = ps(ip);       matRhoS(i2,i1) = rhos(ip);
        end
    end

    % Then run FDR
    fdrArray = fdr(p);      fdrpositive = find(fdrArray);
    fdrSArray = fdr(ps);     fdrSpositive = find(fdrSArray);
    [~,ind] = sort(ps,2,'descend');

    % And now print the results
    if(flag_individual_correlations)
        figure;
        plotN = 3;
        plotM = 5;
        subploti = 0;
    end
    fprintf('\n%2s \t%2s \t%20s\t %30s\t(%3s) \t%5s \t%s \t%s\n','i1','i2','name1','name2','n','r','p','FDR');
    for(q=1:length(p))
        ip = ind(q);
        i1 = corrvars(ip,1); i2 = corrvars(ip,2);
        fprintf('%2d \t%2d \t%35s\t %35s\t%3d ',i1,i2,namesFlat{i1},namesFlat{i2},ncount(ip));
        if(~isnan(p(ip)))
            %fprintf('r = %5.2f; tp = %s; FDR %d',rho(ip),myst(p(ip)),ismember(ip,fdrpositive));
            fprintf('\t%5.2f \t%s \t%d \t%5.2f \t%s \t%d',rho(ip),myst(p(ip)),ismember(ip,fdrpositive),rhos(ip),myst(ps(ip)),ismember(ip,fdrSpositive));
            matFdr(i2,i1) = ismember(ip,fdrpositive);
            matFdrS(i2,i1) = ismember(ip,fdrSpositive);
            if(ismember(ip,fdrpositive) & flag_individual_correlations) % If you want to see pictures of individual correlation plots
                g = ~isnan(data(:,i1)) & ~isnan(data(:,i2));   
                subploti = subploti+1;
                if(subploti>plotN*plotM)    % Time to start a new figure
                    figure;
                    subploti = 1;
                end
                subplot(plotN,plotM,subploti); hold on;
                plot(data(g,i1),data(g,i2),'.');
                mypoly = polyfit(data(g,i1),data(g,i2),1);
                plot([min(data(g,i1)) max(data(g,i1))],polyval(mypoly,[min(data(g,i1)) max(data(g,i1))]),'-','Color',[.8 .8 1]);
                xlabel(names{i1},'FontSize',8);
                ylabel(names{i2},'FontSize',8);
                title(sprintf('r = %5.2f; p = %s',rho(ip),myst(p(ip))));            
                hold off;
                set(gca,'FontSize',8);
            end
        end
        fprintf('\n');
    end
    
    %%% ---------- Circos - plot
    % FDR (previous section) should be run immediately before this one.

    % First - let's order the variables properly
    variableAngle = atan(comps(:,2)./comps(:,1))+pi*sign(comps(:,1));                   % Radar-like spanning through the loads plot
    [~,angInd] = sort(variableAngle);

    %figure; hold on; enum(comps(angInd,2),comps(angInd,1)); hold off; error(); % Debugger

    matPval(isnan(matPval)) = 0;    matPval = matPval+matPval';                         % Make symmetric - just in case
    matRho(isnan(matRho)) = 0;      matRho = matRho+matRho';
    matFdr(isnan(matFdr)) = 0;      matFdr = matFdr+matFdr';
    matPvalS(isnan(matPvalS)) = 0;    matPvalS = matPvalS+matPvalS';    % Spearman equivalents
    matRhoS(isnan(matRhoS)) = 0;      matRhoS = matRhoS+matRhoS';
    matFdrS(isnan(matFdrS)) = 0;      matFdrS = matFdrS+matFdrS';
    circos(namesFlat(angInd),matRho(angInd,angInd),matFdr(angInd,angInd),[],1.2);
    circos(namesFlat(angInd),matRhoS(angInd,angInd),matFdrS(angInd,angInd),[],1.2);
    % circos(namesFlat(angInd),matRho(angInd,angInd),matFdr(angInd,angInd),find(angInd==17));       % Sub-projection of correlations
end
    
%%% ---------- Interesting correlations
if(flag_individualCorrs)
    clear q pairs g p pval r x y px pn pm counter tx ty T1 T2 Tick;
    pairs = [6 8 ; ...  % INa to IKs
        6 10; ...       % INa to IKt
        6 13; ...       % INa to Spike amp
        1 32; ...       % Cm to Minis freq
        17 13; ...      % N spikes step to Spike amp
        17 15; ...      % N spikes step to Spike width
        17 25; ...      % N spikes step to Jitter
        17 24; ...      % N spikes to Wave decay
        2 31; ...       % Rm to Monosynpatisity
        5 7; ...        % Na thresh to Ks thresh
        5 9; ...        % Na thresh to Kt thresh
        28 33; ...      % Synaptic charge to minis amp
        28 32];         % Synaptic charge to minis freq

    % Most of strongest correlations are rather obvious (2 different measures for N spikes; 2 different measures for spike width). 
    % Cm-MinisFrequency is less obvious; INa-IKs is not as obvious as well (even if explainable). 
    % Negative correlation between N spikes and spike shape is obvious-ish, but can be mentioned, 
    % strong negative N spikes vs Jitter is probably interesting enough to be presented. Also this lonely horizontal positive correlation between 
    % "Spike-output linearity" and "Spiking resonance": cells that don't inactivate their spiking prefer slow oscillations of their membrane voltage, 
    % while quickly inactivating cells prefer really fast injections.

    % Of non-existent correlations it's fun that total synaptic charge correlates with nothing, including all minis and synaptic measures. 
    % It's also interesting that transient potassium currents (in pA) correlate so poorly with the rest of the set, including slow K and Na currents 
    % (which correlate really strongly between themselves). Also it's fun that ionic thresholds don't correlate with each other.
    
    pn = 3;    pm = 3;  % Subplotting dimentions
    counter = 0;
    for(q=1:size(pairs,1))  % For each interesting pair
        if(mod(counter,pn*pm)==0)
            figure('Color','white');
        end
        counter = counter+1;
        subplot(pn,pm,mod(counter-1,pm*pn)+1);
        g = subset & ~isnan(data(:,pairs(q,1))) & ~isnan(data(:,pairs(q,2)));
        x = data(g,pairs(q,1));
        y = data(g,pairs(q,2));
        if(pairs(q,1)==28)      % Specially for total synaptic current: bring outliers down
            x = min(x,49000);
        end
        if(pairs(q,2)==9)
            y = max(y,-40);
        end
        plot(x,y,'kx');
        set(gca,'FontSize',8);
        xlabel(namesFlat{pairs(q,1)});
        ylabel(namesFlat{pairs(q,2)});
        [rho,pval] = corr(x,y);
        title(sprintf('r=%4.2f, p=%s, n=%d',rho,myst(pval),length(x)));        
        if(matFdr(pairs(q,1),pairs(q,2)))
            tx = get(gca,'XLim');           % Save current limits, not to let regression line change them
            ty = get(gca,'YLim');
            p = polyfit(x,y,1);
            hold on;
            px = [min(x) max(x)];
            plot(px,polyval(p,px),'b-');
            hold off;
            [T1, T2, Tick] = tick_value(min(y), max(y), 3);            
            %set(gca,'XLim',tx,'YLim',ty);   % Restore old limits
            set(gca,'YLim',[T1 T2],'YTick',T1:Tick:T2);
        end
    end
end


%%% ----------------------- Compare age to discoboxing  -----------------------
if(flag_ageVsDisco)
    clear ap atab astats g iVar varExplainedStage varExplainedBoxedapStage apLight;
    stageRange = 0;
    stageRange(42:49) = [43 43 43 45 45 47 49 49];          % Rough approach to ages
    for(iVar=1:length(names))    
        g = ~isnan(data(:,iVar));                                % Good indices
        g0 = g & (boxed==0);                                     % naive
        g1 = g & (stageRange(stage)==49)';                       % Discoboxed
        [apStage(iVar),atab,astats] = anova1(data(g0,iVar),stageRange(stage(g0)),'off');
        varExplainedStage(iVar) = atab{2,2}/atab{4,2};                % Ratio of SS groups to SS total
        [apLight(iVar),atab,astats] = anova1(data(g1,iVar),boxed(g1),'off');
        varExplainedBoxed(iVar) = atab{2,2}/atab{4,2};                % Ratio of SS groups to SS total        
    end
    
    figure('Color','white'); hold on;
    % subplot(1,2,1);
    % enum(ones(size(varExplainedStage)),varExplainedStage,names);
    % subplot(1,2,2);
    % enum(ones(size(varExplainedBoxed)),varExplainedBoxed,names);
    if(1)   % Signed
        if(~exist('gUp','var'))
            error('Run full age analysis before comparing age to disco');
        end        
        ny = varExplainedStage.*signAge;
        nx = varExplainedBoxed.*signDisco;
        % nx = tanh(nx*7)/7;        ny = tanh(ny*10)/10;    % Nice but impossible to explain transformation
    else
        ny = varExplainedStage
        nx = varExplainedBoxed;        % Absolute values (no sign - not as nice)
    end
    %plot(nx,ny,'.'); 
    enum(nx,ny,namesFlat);
    plot(nx(apStage<0.05),ny(apStage<0.05),'o','MarkerEdgeColor','none','MarkerFaceColor','red');
    plot(nx(apLight<0.05),ny(apLight<0.05),'o','MarkerEdgeColor','blue','MarkerFaceColor','none','MarkerSize',10);
    % set(gca,'XScale','log','YScale','log');
    ylabel('Change with age'); xlabel('Change after overstimulation');
    set(gca,'DataAspectRatio',[1 1 1]);
    hold off;
    mygrid(gca);    
end

if(flag_pattern_by_group)   %%% ----------------- Variation explained per group of cells
    % comps scores proj fullscores V 
    % var(comps) = eigenvalues
    proxy = (comps*fullScores')';        % PCA-based proxy values
    absVarExplained = sum(var(proxy))/33;% PCA-based calculation
    relativeVarExplained = 1-V;          % Share, from the PCA output. I'm not sure why var reported by PCA is slightly larger than the one I observe;
                                         % I assume that's because of the "imputation": NAN values are part of PCA-calculation, but aren't part of my var(dataN)
    scaleFactor = relativeVarExplained/absVarExplained;
    fprintf('Full set, var explained: %3.2f\n',sum(var(proxy))/33*scaleFactor);

    stageGroups = {[42 43 44],[45 46],[47],[48 49]};                % Simplified staging
    stageNames = {'43-44','45-46','47','48-49'};
    stagex = [43.5 45.5 47 48.5]-43;
    stageRange([43 44 45 46 47 48 49]) = [43 43 45 45 47 49 49];    % for ANOVA
    [n,m] = size(dataN);
    for(iStage=1:4)
        g = ismember(stage,stageGroups{iStage}) & (boxed~=1);
        localVarData = 0;        
        for(iVar=1:m)
            localVarData = localVarData + var(dataN(g & ~isnan(dataN(:,iVar)),iVar));
        end
        localVar = sum(var(proxy(g,:)));
        fprintf('Naive, stage %s;\t var before: %5.2f;\t var after: %5.2f;\t var explained: %3.2f\n',stageNames{iStage},...
            localVarData, localVar, localVar/localVarData*scaleFactor);
    end
    %%% And now the same for ligthboxed data:
    g = (boxed==1);
    for(iVar=1:m)
        localVarData = localVarData + var(dataN(g & ~isnan(dataN(:,iVar)),iVar));
    end
    localVar = sum(var(proxy(g,:)));
    fprintf('Stimulated;     \t var before: %5.2f;\t var after: %5.2f;\t var explained: %3.2f\n',...
            localVarData, localVar, localVar/localVarData*scaleFactor);
        
        
    %%% Part 2 - individual PCAs
    fprintf('\nLocal 2-component Bayesian PCAs:\n');
    figure;
    for(iStage=1:4)
        g = ismember(stage,stageGroups{iStage}) & (boxed~=1);
        dataLocN = dataN(g,:);
        for(iVar=1:m)
            dataLocN(:,iVar) = dataLocN(:,iVar)-mean(dataLocN(~isnan(dataLocN(:,iVar)),iVar));
            dataLocN(:,iVar) = dataLocN(:,iVar)/std(dataLocN(~isnan(dataLocN(:,iVar)),iVar));
        end
        [ Aloc, Sloc, Muloc, Vl, ~, ~, ~ ] = pca_full(dataLocN',nComps,'verbose',0);        
        %%% X = Mu + AS, which makes comps=A and scores=S'
        fprintf('Naive, stage %s;\t var explained: %3.2f\n',stageNames{iStage},1-Vl);
        subplot(2,2,iStage);
        plot(Sloc(1,:),Sloc(2,:),'.');
        drawnow();
    end
    g = (boxed==1);
    dataLocN = dataN(g,:);
    for(iVar=1:m)
        dataLocN(:,iVar) = dataLocN(:,iVar)-mean(dataLocN(~isnan(dataLocN(:,iVar)),iVar));
        dataLocN(:,iVar) = dataLocN(:,iVar)/std(dataLocN(~isnan(dataLocN(:,iVar)),iVar));
    end
    [ ~, ~, ~, Vl, ~, ~, ~ ] = pca_full(dataN(g,:)',nComps,'verbose',0);  
    fprintf('Stimulated;     \t var explained: %3.2f\n',1-Vl);
end